File set-up

Set working directory to current directory

if (rstudioapi::isAvailable()) {
  setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
}

Load standard libraries and resolve conflicts

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package
conflict_prefer("slice", "dplyr")
## [conflicted] Will prefer dplyr::slice over any other package
conflict_prefer("rename", "dplyr")
## [conflicted] Will prefer dplyr::rename over any other package
conflict_prefer('intersect', 'dplyr')
## [conflicted] Will prefer dplyr::intersect over any other package

Load specific libraries

library(ggseqlogo)

Set figure theme

mytheme = theme_bw(base_size = 10) + 
  theme(text = element_text(size=10, colour='black'),
        title = element_text(size=10, colour='black'),
        line = element_line(size=0.5),
        axis.title = element_text(size=10, colour='black'),
        axis.text = element_text(size=10, colour='black'),
        axis.line = element_line(colour = "black"),
        axis.ticks = element_line(size=0.5),
        strip.background = element_blank(),
        strip.text.y = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = c(0.8, 0.8), 
        legend.text=element_text(size=10)) 

mytheme_discrete_x = mytheme + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

# a colorblind-friendly palette 
# colorblind.palette.grey = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

Read data and order

all_circ = read_tsv('../data/Supplementary_Table_2_all_circRNAs.txt')
## Rows: 1137099 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (15): chr, strand, cell_line, tool, circ_id, circ_id_strand, count_group...
## dbl  (6): start, end, BSJ_count, n_detected, n_db, estim_len_in
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_circ$tool = factor(all_circ$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2",  "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE",  "circRNA_finder", "segemehl")) 

all_circ

Figure 2B & Sup Figure 1

all_circ %>% 
  group_by(tool, cell_line, count_group) %>% 
  summarise(n = n()) %>%
  group_by(tool, cell_line) %>% mutate(total_n = sum(n)) %>%
  filter(cell_line == "HLF") %>%
  ggplot(aes(reorder(tool, -total_n), n, fill = count_group)) + 
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::comma_format()) +
  theme(axis.text.x = element_text(angle = 45, hjust=1)) +
  #facet_wrap(~cell_line, nrow = 3) +
  mytheme_discrete_x +
  theme(axis.title.x=element_blank(), legend.title=element_blank()) +
  scale_fill_manual(values = c('#00B9F2', '#E69F00' , '#999999')) +
  ylab("number of circRNAs")
## `summarise()` has grouped output by 'tool', 'cell_line'. You can override using
## the `.groups` argument.

#ggsave('separate_figures/figure_2B.pdf',  width = 10, height = 8.5, units = "cm")
#ggsave('../supplemental/sup_figures/sup_figure_1.pdf',  width = 20, height = 12, units = "cm")

Figure 2C & Sup Figure 8

nr_detected = all_circ %>% 
  group_by(chr, start, end, circ_id, cell_line) %>%
  summarise(n_detected = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'chr', 'start', 'end', 'circ_id'. You can
## override using the `.groups` argument.
nr_detected
n_detected_per_tool = all_circ %>% 
  left_join(nr_detected) %>%
  mutate(n_detected_group = NA,
         n_detected_group = ifelse(n_detected == 1, 'unique', n_detected_group),
         n_detected_group = ifelse(n_detected > 1, '2 tools', n_detected_group),
         n_detected_group = ifelse(n_detected > 2, '2-5 tools', n_detected_group),
         n_detected_group = ifelse(n_detected > 5, '6-9 tools', n_detected_group),
         n_detected_group = ifelse(n_detected > 9, '≥ 10 tools', n_detected_group),
         n_detected_group = ifelse(n_detected == 16, 'all tools', n_detected_group))
## Joining, by = c("chr", "start", "end", "cell_line", "circ_id", "n_detected")
n_detected_per_tool$n_detected_group = factor(n_detected_per_tool$n_detected_group, 
                                              levels = c('all tools', '≥ 10 tools', '6-9 tools', "2-5 tools", '2 tools', 'unique'))

n_detected_per_tool$tool = factor(n_detected_per_tool$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2",  "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE",  "circRNA_finder", "segemehl")) 


n_detected_per_tool %>%
  #filter(cell_line == "HLF") %>%
  ggplot(aes(tool, fill = n_detected_group)) +
  geom_bar() +
  mytheme_discrete_x +
  ylab("number of circRNAs") +
  scale_fill_manual(name = "circRNAs detected by",
                    values = c('#D55E00',"#0072B2", '#00B9F2', '#00A875', "#E69F00", "#CC79A7")) +
  theme(axis.title.x=element_blank()) +
  #facet_wrap(~cell_line) +
  scale_y_continuous(labels = scales::comma_format())

#ggsave('separate_figures/figure_2C.pdf',  width = 10, height = 9, units = "cm")
#ggsave('../supplemental/sup_figures/sup_figure_8.pdf',  width = 21, height = 12, units = "cm")

Figure 2D

circ_ss_list = list()

for (tool_name in all_circ %>% pull(tool) %>% unique()) {
  ss = all_circ %>% filter(tool == tool_name) %>% pull(ss_motif) 
  if (length(ss) > 0 ){
    circ_ss_list[tool_name] = all_circ %>% filter(tool == tool_name) %>% pull(ss_motif) %>% list()
    
  }
}


# Create custom colour scheme
cs1 = make_col_scheme(chars=c('A', 'T', 'C', 'G'), 
                      cols=c('#00A875', '#F15A22', '#0072BC', '#F7941D'))

ggplot() + 
  geom_logo(circ_ss_list, method = 'prob', col_scheme = cs1) + 
  theme_logo() +
  guides(scale = "none") +
  facet_wrap(~seq_group, nrow = 2) +
  theme(axis.text.x=element_blank(),
        text = element_text(size=10, colour='gray30'),
        title = element_text(size=10, colour='gray30'))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

#ggsave('separate_figures/figure_2D.pdf',  width = 20, height = 4.5, units = "cm")